In [3]:
import os
from sklearn import preprocessing
from sklearn.tree import DecisionTreeClassifier
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.externals import joblib
from sklearn.feature_extraction.text import CountVectorizer
from sklearn import preprocessing
from sklearn.tree import DecisionTreeClassifier
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [10]:
from sklearn import clone
from sklearn import preprocessing
from sklearn import svm
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.externals import joblib
from sklearn.feature_extraction import DictVectorizer
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier

import datetime as dt

In [6]:
fp_df = os.path.expanduser('~/cltk_data/user_data/tlg_bow_df.pickle')
dataframe_bow = joblib.load(fp_df)

In [7]:
Y = dataframe_bow['epithet']

In [8]:
X = dataframe_bow.drop(['epithet', 'id', 'author'], 1)

In [9]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=0)

In [11]:
def scale_data(X_train, X_test, Y_train, Y_test):
    """Take Vectors, 
    """

    '''
    -PREPOCESSING 
    -Here, scaled data has zero mean and unit varience
    -We save the scaler to later use with testing/prediction data
    '''
    print('Scaling data ...')
    t0 = dt.datetime.utcnow()
    scaler = preprocessing.StandardScaler().fit(X_train)
    fp_scaler = os.path.expanduser('~/cltk_data/user_data/tlg_bow_scaler.pickle')
    joblib.dump(scaler, fp_scaler)
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    print('... finished in {} secs.'.format(dt.datetime.utcnow() - t0))
    print()

    return X_train_scaled, X_test_scaled, Y_train, Y_test

In [12]:
X_train_scaled, X_test_scaled, Y_train, Y_test = scale_data(X_train, X_test, Y_train, Y_test)


Scaling data ...
... finished in 0:00:08.612654 secs.

Decision tree


In [13]:
def run_tree(X_train_scaled, X_test_scaled, Y_train, Y_test):
    """Run decision tree with scikit.
    
    Experiment with: 'max_depth'
    """
    '''
    -This is where we define the models with pre-defined parameters
    -We can learn these parameters given our data
    '''
    print('Defining and fitting models ...')
    t0 = dt.datetime.utcnow()
    dec_tree = DecisionTreeClassifier()

    dec_tree.fit(X_train_scaled, Y_train)

    fp_model_pickle = os.path.expanduser('~/cltk_data/user_data/tlg_bow_dt.pickle')
    joblib.dump(dec_tree, fp_model_pickle)

    print('... finished in {} secs.'.format(dt.datetime.utcnow() - t0))
    print()
    

    Y_prediction_tree = dec_tree.predict(X_test_scaled)
    print('tree_predictions ', Y_prediction_tree)

    expected = Y_test
    print('actual_values   ', expected)


    print()
    print('----Tree_report--------------------------------')
    print(classification_report(expected, Y_prediction_tree))

In [14]:
run_tree(X_train_scaled, X_test_scaled, Y_train, Y_test)


Defining and fitting models ...
... finished in 0:00:33.923361 secs.

tree_predictions  ['Musici' 'Historici/-ae' 'Philosophici/-ae' 'Astrologici' 'Tragici'
 'Comici' 'Tragici' 'Scriptores Ecclesiastici' 'Comici' 'Philosophici/-ae'
 'Philosophici/-ae' 'Comici' 'Comici' 'Historici/-ae' 'Tragici'
 'Historici/-ae' 'Theologici' 'Tragici' 'Historici/-ae' 'Gnostici'
 'Philosophici/-ae' 'Historici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Historici/-ae' 'Tragici' 'Tragici' 'Historici/-ae' 'Comici' 'Tragici'
 'Historici/-ae' 'Philosophici/-ae' 'Tragici' 'Historici/-ae'
 'Philosophici/-ae' 'Historici/-ae' 'Medici' 'Comici' 'Philosophici/-ae'
 'Historici/-ae' 'Bucolici' 'Apologetici' 'Elegiaci' 'Historici/-ae'
 'Historici/-ae' 'Tragici' 'Historici/-ae' 'Tragici' 'Historici/-ae'
 'Historici/-ae' 'Epigrammatici/-ae' 'Philosophici/-ae' 'Epici/-ae'
 'Philosophici/-ae' 'Epigrammatici/-ae' 'Historici/-ae' 'Comici'
 'Historici/-ae' 'Historici/-ae' 'Tragici' 'Theologici' 'Rhetorici'
 'Historici/-ae' 'Philosophici/-ae' 'Historici/-ae' 'Medici'
 'Philosophici/-ae' 'Poetae' 'Tragici' 'Historici/-ae' 'Comici'
 'Historici/-ae' 'Historici/-ae' 'Philosophici/-ae' 'Tactici' 'Epici/-ae'
 'Comici' 'Scriptores Ecclesiastici' 'Historici/-ae' 'Tragici' 'Comici'
 'Tragici' 'Epigrammatici/-ae' 'Tragici' 'Philosophici/-ae'
 'Philosophici/-ae' 'Philosophici/-ae' 'Historici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Comici' 'Historici/-ae' 'Poetae' 'Epigrammatici/-ae'
 'Historici/-ae' 'Tragici' 'Oratores' 'Comici' 'Tragici' 'Epici/-ae'
 'Epici/-ae' 'Historici/-ae' 'Philosophici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Comici' 'Poetae' 'Philosophici/-ae' 'Epigrammatici/-ae'
 'Philosophici/-ae' 'Mathematici' 'Tragici' 'Epici/-ae' 'Rhetorici'
 'Sophistae' 'Poetae' 'Comici' 'Gnostici' 'Comici' 'Rhetorici'
 'Epigrammatici/-ae' 'Tragici' 'Philosophici/-ae' 'Philosophici/-ae'
 'Epici/-ae' 'Tragici' 'Tragici' 'Philosophici/-ae' 'Philosophici/-ae'
 'Historici/-ae' 'Parodii' 'Tragici' 'Tragici' 'Mathematici' 'Tragici'
 'Poetae' 'Mechanici' 'Tragici' 'Philosophici/-ae' 'Tragici'
 'Historici/-ae' 'Epici/-ae' 'Tragici' 'Theologici' 'Epigrammatici/-ae'
 'Oratores' 'Medici' 'Philosophici/-ae' 'Historici/-ae' 'Medici' 'Comici'
 'Comici' 'Philosophici/-ae' 'Epici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Comici' 'Theologici' 'Historici/-ae' 'Tragici' 'Historici/-ae' 'Comici'
 'Historici/-ae' 'Theologici' 'Scriptores Ecclesiastici' 'Comici'
 'Philosophici/-ae' 'Biographi' 'Historici/-ae' 'Philosophici/-ae'
 'Philosophici/-ae' 'Biographi' 'Tragici' 'Philosophici/-ae'
 'Philosophici/-ae' 'Tragici' 'Scriptores Ecclesiastici' 'Mythographi'
 'Historici/-ae' 'Scriptores Ecclesiastici' 'Philosophici/-ae' 'Tragici'
 'Medici' 'Comici' 'Sophistae' 'Tragici' 'Theologici' 'Tragici' 'Poetae'
 'Scriptores Ecclesiastici' 'Paradoxographi' 'Comici' 'Tragici'
 'Grammatici' 'Historici/-ae' 'Historici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Scriptores Ecclesiastici' 'Tragici'
 'Scriptores Ecclesiastici' 'Oratores' 'Biographi' 'Historici/-ae'
 'Lyrici/-ae' 'Historici/-ae' 'Historici/-ae' 'Historici/-ae' 'Tragici'
 'Lyrici/-ae' 'Theologici' 'Tactici' 'Comici' 'Tragici' 'Grammatici'
 'Philosophici/-ae' 'Philosophici/-ae' 'Philosophici/-ae' 'Comici'
 'Tragici' 'Comici' 'Theologici' 'Philosophici/-ae' 'Mathematici' 'Comici'
 'Sophistae' 'Historici/-ae' 'Epigrammatici/-ae' 'Philosophici/-ae'
 'Tragici' 'Historici/-ae' 'Comici' 'Historici/-ae' 'Philosophici/-ae'
 'Tragici' 'Lyrici/-ae' 'Epici/-ae' 'Paroemiographi' 'Historici/-ae'
 'Grammatici' 'Philosophici/-ae' 'Philosophici/-ae' 'Comici'
 'Historici/-ae' 'Epici/-ae' 'Historici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Comici' 'Elegiaci' 'Iambici' 'Grammatici'
 'Scriptores Ecclesiastici' 'Tragici' 'Scriptores Ecclesiastici' 'Comici'
 'Medici' 'Historici/-ae' 'Historici/-ae' 'Medici' 'Historici/-ae' 'Medici'
 'Philosophici/-ae' 'Historici/-ae' 'Historici/-ae' 'Mathematici'
 'Historici/-ae' 'Historici/-ae' 'Tragici' 'Epici/-ae' 'Historici/-ae'
 'Philosophici/-ae' 'Philosophici/-ae' 'Tragici' 'Philosophici/-ae'
 'Philosophici/-ae' 'Epigrammatici/-ae' 'Tragici' 'Tragici' 'Sophistae'
 'Historici/-ae' 'Poetae' 'Epici/-ae' 'Medici' 'Philosophici/-ae'
 'Philosophici/-ae' 'Mathematici' 'Philosophici/-ae' 'Historici/-ae'
 'Comici' 'Theologici' 'Theologici' 'Alchemistae' 'Comici' 'Comici'
 'Historici/-ae' 'Tragici' 'Theologici' 'Philosophici/-ae' 'Medici'
 'Tragici' 'Gnostici' 'Scriptores Ecclesiastici' 'Tragici'
 'Epigrammatici/-ae' 'Epici/-ae' 'Tragici' 'Elegiaci' 'Poetae' 'Epici/-ae'
 'Philosophici/-ae' 'Poetae' 'Historici/-ae' 'Tragici' 'Philosophici/-ae'
 'Atticistae' 'Comici' 'Tragici' 'Parodii' 'Comici' 'Oratores' 'Comici'
 'Astronomici' 'Philosophici/-ae' 'Biographi' 'Astronomici' 'Medici'
 'Philosophici/-ae' 'Mathematici' 'Philosophici/-ae' 'Historici/-ae'
 'Theologici' 'Sophistae' 'Comici' 'Philosophici/-ae' 'Comici'
 'Philosophici/-ae' 'Historici/-ae' 'Comici' 'Scriptores Ecclesiastici'
 'Comici' 'Tragici' 'Philosophici/-ae' 'Iambici' 'Elegiaci' 'Epici/-ae'
 'Elegiaci' 'Scriptores Ecclesiastici' 'Poetae' 'Philosophici/-ae'
 'Scriptores Erotici' 'Elegiaci' 'Scriptores Ecclesiastici' 'Tragici'
 'Historici/-ae' 'Mathematici' 'Lyrici/-ae' 'Epigrammatici/-ae'
 'Historici/-ae' 'Grammatici' 'Historici/-ae' 'Scriptores Ecclesiastici'
 'Periegetae' 'Tragici' 'Historici/-ae' 'Historici/-ae' 'Tragici' 'Comici'
 'Comici' 'Historici/-ae' 'Philosophici/-ae' 'Apologetici'
 'Scriptores Ecclesiastici']
actual_values    11      Scriptores Ecclesiastici
440                Historici/-ae
1352           Epigrammatici/-ae
592                  Apologetici
1178                     Tragici
413                       Poetae
1458                  Lyrici/-ae
1621                   Biographi
715                    Geographi
214                   Grammatici
958             Philosophici/-ae
1669                      Comici
555                      Tragici
1092               Historici/-ae
874                       Comici
513                Historici/-ae
1424                  Theologici
1381                      Comici
99                 Historici/-ae
611             Philosophici/-ae
1291            Philosophici/-ae
377                Historici/-ae
1142                   Rhetorici
425                       Medici
238                Historici/-ae
943                       Comici
559                       Comici
714                   Grammatici
249                   Grammatici
838                Poetae Medici
                  ...           
1182                      Comici
649                   Lyrici/-ae
830                    Epici/-ae
672                      Tragici
76                 Historici/-ae
963                       Poetae
647             Philosophici/-ae
109               Paroemiographi
1718                      Comici
948                   Theologici
1447                   Epici/-ae
48                 Historici/-ae
1253                   Geographi
92                        Comici
1641                     Tragici
1562            Philosophici/-ae
1367          Scriptores Erotici
849                Historici/-ae
302                   Theologici
800                Historici/-ae
870                   Lyrici/-ae
720                    Biographi
231                Historici/-ae
193                       Comici
1751                      Comici
382                   Lyrici/-ae
10                 Historici/-ae
1157                  Grammatici
1660                 Hymnographi
659             Philosophici/-ae
Name: epithet, dtype: object

----Tree_report--------------------------------
                          precision    recall  f1-score   support

             Alchemistae       1.00      0.20      0.33         5
             Apologetici       0.00      0.00      0.00         3
             Astrologici       0.00      0.00      0.00         0
             Astronomici       0.00      0.00      0.00         7
              Atticistae       0.00      0.00      0.00         1
               Biographi       0.00      0.00      0.00         3
                Bucolici       0.00      0.00      0.00         0
            Chronographi       0.00      0.00      0.00         1
                  Comici       0.36      0.38      0.37        40
                Elegiaci       0.17      0.17      0.17         6
               Epici/-ae       0.27      0.27      0.27        15
       Epigrammatici/-ae       0.00      0.00      0.00         5
          Epistolographi       0.00      0.00      0.00         1
               Geographi       0.00      0.00      0.00         5
                 Gnomici       0.00      0.00      0.00         1
                Gnostici       0.00      0.00      0.00         1
              Grammatici       0.00      0.00      0.00        14
           Historici/-ae       0.66      0.62      0.64        84
             Hymnographi       0.00      0.00      0.00         1
                 Iambici       0.00      0.00      0.00         2
            Lexicographi       0.00      0.00      0.00         1
              Lyrici/-ae       0.00      0.00      0.00        13
             Mathematici       0.14      0.33      0.20         3
               Mechanici       0.00      0.00      0.00         0
                  Medici       0.36      0.50      0.42         8
                  Musici       0.00      0.00      0.00         2
             Mythographi       0.00      0.00      0.00         0
                Oratores       0.00      0.00      0.00         1
          Paradoxographi       0.00      0.00      0.00         4
                 Parodii       0.00      0.00      0.00         1
          Paroemiographi       0.00      0.00      0.00         1
              Periegetae       0.00      0.00      0.00         5
        Philosophici/-ae       0.45      0.46      0.45        59
                  Poetae       0.10      0.11      0.11         9
           Poetae Medici       0.00      0.00      0.00         1
           Polyhistorici       0.00      0.00      0.00         1
               Rhetorici       0.00      0.00      0.00         8
Scriptores Ecclesiastici       0.12      0.22      0.16         9
      Scriptores Erotici       0.00      0.00      0.00         1
    Scriptores Fabularum       0.00      0.00      0.00         1
               Sophistae       0.40      0.25      0.31         8
                 Tactici       0.00      0.00      0.00         2
              Theologici       0.42      0.42      0.42        12
                 Tragici       0.24      0.46      0.31        28

             avg / total       0.34      0.34      0.33       373

/root/venv/lib/python3.5/site-packages/sklearn/metrics/classification.py:1113: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
/root/venv/lib/python3.5/site-packages/sklearn/metrics/classification.py:1115: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples.
  'recall', 'true', average, warn_for)

Random forest


In [15]:
def run_random_forest(X_train_scaled, X_test_scaled, Y_train, Y_test):
    """Scikit random forest
    
    Experiment with 'n_estimators'
    """
    
    n_estimators = 30
    
    rf_model = RandomForestClassifier(n_estimators=n_estimators)

    # Train
    clf = clone(rf_model)
    clf = rf_model.fit(X_train_scaled, Y_train)
    
    #joblib.dump(clf, 'models/random_forest.pickle')

    fp_model_pickle = os.path.expanduser('~/cltk_data/user_data/tlg_bow_fandom_forest.pickle')
    joblib.dump(clf, fp_model_pickle)
    
    scores = clf.score(X_train_scaled, Y_train)
    
    
    
    Y_prediction = clf.predict(X_test_scaled)
    print('tree_predictions ', Y_prediction)

    expected = Y_test
    print('actual_values   ', expected)


    print()
    print('----Random forest report--------------------------------')
    print(classification_report(expected, Y_prediction))

In [16]:
run_random_forest(X_train_scaled, X_test_scaled, Y_train, Y_test)


tree_predictions  ['Philosophici/-ae' 'Historici/-ae' 'Epici/-ae' 'Philosophici/-ae'
 'Tragici' 'Comici' 'Lyrici/-ae' 'Scriptores Ecclesiastici' 'Historici/-ae'
 'Grammatici' 'Philosophici/-ae' 'Comici' 'Tragici' 'Historici/-ae'
 'Tragici' 'Historici/-ae' 'Theologici' 'Tragici' 'Historici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Historici/-ae' 'Grammatici' 'Medici'
 'Historici/-ae' 'Tragici' 'Tragici' 'Historici/-ae' 'Comici' 'Epici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Tragici' 'Historici/-ae'
 'Philosophici/-ae' 'Historici/-ae' 'Medici' 'Comici' 'Philosophici/-ae'
 'Historici/-ae' 'Epici/-ae' 'Historici/-ae' 'Comici' 'Historici/-ae'
 'Historici/-ae' 'Tragici' 'Historici/-ae' 'Tragici' 'Historici/-ae'
 'Historici/-ae' 'Epici/-ae' 'Philosophici/-ae' 'Comici' 'Historici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Tragici' 'Historici/-ae' 'Historici/-ae'
 'Tragici' 'Scriptores Ecclesiastici' 'Medici' 'Philosophici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Philosophici/-ae' 'Philosophici/-ae'
 'Historici/-ae' 'Tragici' 'Historici/-ae' 'Comici' 'Historici/-ae'
 'Historici/-ae' 'Philosophici/-ae' 'Historici/-ae' 'Historici/-ae'
 'Comici' 'Philosophici/-ae' 'Scriptores Ecclesiastici' 'Tragici'
 'Historici/-ae' 'Tragici' 'Poetae' 'Lyrici/-ae' 'Philosophici/-ae'
 'Philosophici/-ae' 'Scriptores Ecclesiastici' 'Historici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Comici' 'Historici/-ae'
 'Philosophici/-ae' 'Historici/-ae' 'Historici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Comici' 'Tragici' 'Epici/-ae' 'Philosophici/-ae'
 'Historici/-ae' 'Philosophici/-ae' 'Historici/-ae' 'Tragici' 'Tragici'
 'Philosophici/-ae' 'Philosophici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Philosophici/-ae' 'Philosophici/-ae' 'Epici/-ae' 'Historici/-ae'
 'Philosophici/-ae' 'Tragici' 'Comici' 'Tragici' 'Historici/-ae' 'Tragici'
 'Tragici' 'Comici' 'Philosophici/-ae' 'Philosophici/-ae' 'Historici/-ae'
 'Tragici' 'Tragici' 'Historici/-ae' 'Philosophici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Tragici' 'Tragici' 'Philosophici/-ae' 'Tragici'
 'Philosophici/-ae' 'Philosophici/-ae' 'Tragici' 'Philosophici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Lyrici/-ae' 'Comici' 'Theologici'
 'Historici/-ae' 'Historici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Historici/-ae' 'Medici' 'Philosophici/-ae' 'Comici' 'Philosophici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Philosophici/-ae' 'Philosophici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Tragici' 'Historici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Scriptores Ecclesiastici' 'Historici/-ae' 'Epici/-ae'
 'Philosophici/-ae' 'Scriptores Ecclesiastici' 'Historici/-ae' 'Comici'
 'Philosophici/-ae' 'Philosophici/-ae' 'Philosophici/-ae' 'Comici'
 'Philosophici/-ae' 'Tragici' 'Philosophici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Scriptores Ecclesiastici' 'Historici/-ae' 'Tragici'
 'Philosophici/-ae' 'Historici/-ae' 'Philosophici/-ae' 'Tragici'
 'Theologici' 'Tragici' 'Historici/-ae' 'Scriptores Ecclesiastici'
 'Philosophici/-ae' 'Tragici' 'Tragici' 'Philosophici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Historici/-ae' 'Scriptores Ecclesiastici'
 'Tragici' 'Scriptores Ecclesiastici' 'Historici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Historici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Grammatici' 'Philosophici/-ae' 'Theologici'
 'Historici/-ae' 'Tragici' 'Tragici' 'Philosophici/-ae' 'Philosophici/-ae'
 'Historici/-ae' 'Comici' 'Comici' 'Tragici' 'Philosophici/-ae'
 'Philosophici/-ae' 'Philosophici/-ae' 'Comici' 'Comici' 'Philosophici/-ae'
 'Historici/-ae' 'Comici' 'Philosophici/-ae' 'Tragici' 'Historici/-ae'
 'Comici' 'Historici/-ae' 'Philosophici/-ae' 'Tragici' 'Comici'
 'Historici/-ae' 'Philosophici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Philosophici/-ae' 'Philosophici/-ae' 'Lyrici/-ae' 'Historici/-ae'
 'Epici/-ae' 'Philosophici/-ae' 'Historici/-ae' 'Historici/-ae'
 'Philosophici/-ae' 'Tragici' 'Comici' 'Scriptores Ecclesiastici'
 'Scriptores Ecclesiastici' 'Tragici' 'Comici' 'Tragici' 'Philosophici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Medici' 'Philosophici/-ae' 'Medici'
 'Comici' 'Historici/-ae' 'Historici/-ae' 'Comici' 'Medici' 'Historici/-ae'
 'Tragici' 'Historici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Philosophici/-ae' 'Lyrici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Philosophici/-ae' 'Tragici' 'Tragici' 'Sophistae' 'Philosophici/-ae'
 'Tragici' 'Epici/-ae' 'Philosophici/-ae' 'Historici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Philosophici/-ae' 'Historici/-ae' 'Comici' 'Theologici'
 'Theologici' 'Philosophici/-ae' 'Historici/-ae' 'Epici/-ae'
 'Historici/-ae' 'Tragici' 'Scriptores Ecclesiastici' 'Historici/-ae'
 'Theologici' 'Tragici' 'Philosophici/-ae' 'Philosophici/-ae'
 'Historici/-ae' 'Philosophici/-ae' 'Comici' 'Tragici' 'Tragici'
 'Historici/-ae' 'Historici/-ae' 'Philosophici/-ae' 'Poetae'
 'Historici/-ae' 'Historici/-ae' 'Historici/-ae' 'Mathematici' 'Tragici'
 'Historici/-ae' 'Historici/-ae' 'Comici' 'Philosophici/-ae' 'Tragici'
 'Historici/-ae' 'Tragici' 'Medici' 'Philosophici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Philosophici/-ae' 'Historici/-ae'
 'Philosophici/-ae' 'Philosophici/-ae' 'Comici' 'Alchemistae' 'Tragici'
 'Philosophici/-ae' 'Philosophici/-ae' 'Sophistae' 'Philosophici/-ae'
 'Comici' 'Tragici' 'Grammatici' 'Comici' 'Sophistae' 'Epici/-ae'
 'Epici/-ae' 'Historici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Historici/-ae' 'Comici' 'Scriptores Ecclesiastici' 'Tragici'
 'Historici/-ae' 'Historici/-ae' 'Lyrici/-ae' 'Comici' 'Historici/-ae'
 'Historici/-ae' 'Philosophici/-ae' 'Philosophici/-ae' 'Historici/-ae'
 'Tragici' 'Philosophici/-ae' 'Historici/-ae' 'Tragici' 'Philosophici/-ae'
 'Comici' 'Historici/-ae' 'Philosophici/-ae' 'Scriptores Ecclesiastici'
 'Philosophici/-ae']
actual_values    11      Scriptores Ecclesiastici
440                Historici/-ae
1352           Epigrammatici/-ae
592                  Apologetici
1178                     Tragici
413                       Poetae
1458                  Lyrici/-ae
1621                   Biographi
715                    Geographi
214                   Grammatici
958             Philosophici/-ae
1669                      Comici
555                      Tragici
1092               Historici/-ae
874                       Comici
513                Historici/-ae
1424                  Theologici
1381                      Comici
99                 Historici/-ae
611             Philosophici/-ae
1291            Philosophici/-ae
377                Historici/-ae
1142                   Rhetorici
425                       Medici
238                Historici/-ae
943                       Comici
559                       Comici
714                   Grammatici
249                   Grammatici
838                Poetae Medici
                  ...           
1182                      Comici
649                   Lyrici/-ae
830                    Epici/-ae
672                      Tragici
76                 Historici/-ae
963                       Poetae
647             Philosophici/-ae
109               Paroemiographi
1718                      Comici
948                   Theologici
1447                   Epici/-ae
48                 Historici/-ae
1253                   Geographi
92                        Comici
1641                     Tragici
1562            Philosophici/-ae
1367          Scriptores Erotici
849                Historici/-ae
302                   Theologici
800                Historici/-ae
870                   Lyrici/-ae
720                    Biographi
231                Historici/-ae
193                       Comici
1751                      Comici
382                   Lyrici/-ae
10                 Historici/-ae
1157                  Grammatici
1660                 Hymnographi
659             Philosophici/-ae
Name: epithet, dtype: object

----Random forest report--------------------------------
                          precision    recall  f1-score   support

             Alchemistae       1.00      0.20      0.33         5
             Apologetici       0.00      0.00      0.00         3
             Astronomici       0.00      0.00      0.00         7
              Atticistae       0.00      0.00      0.00         1
               Biographi       0.00      0.00      0.00         3
            Chronographi       0.00      0.00      0.00         1
                  Comici       0.64      0.57      0.61        40
                Elegiaci       0.00      0.00      0.00         6
               Epici/-ae       0.25      0.20      0.22        15
       Epigrammatici/-ae       0.00      0.00      0.00         5
          Epistolographi       0.00      0.00      0.00         1
               Geographi       0.00      0.00      0.00         5
                 Gnomici       0.00      0.00      0.00         1
                Gnostici       0.00      0.00      0.00         1
              Grammatici       0.75      0.21      0.33        14
           Historici/-ae       0.57      0.89      0.70        84
             Hymnographi       0.00      0.00      0.00         1
                 Iambici       0.00      0.00      0.00         2
            Lexicographi       0.00      0.00      0.00         1
              Lyrici/-ae       0.17      0.08      0.11        13
             Mathematici       1.00      0.33      0.50         3
                  Medici       0.62      0.62      0.62         8
                  Musici       0.00      0.00      0.00         2
                Oratores       0.00      0.00      0.00         1
          Paradoxographi       0.00      0.00      0.00         4
                 Parodii       0.00      0.00      0.00         1
          Paroemiographi       0.00      0.00      0.00         1
              Periegetae       0.00      0.00      0.00         5
        Philosophici/-ae       0.43      0.64      0.52        59
                  Poetae       0.00      0.00      0.00         9
           Poetae Medici       0.00      0.00      0.00         1
           Polyhistorici       0.00      0.00      0.00         1
               Rhetorici       0.00      0.00      0.00         8
Scriptores Ecclesiastici       0.20      0.33      0.25         9
      Scriptores Erotici       0.00      0.00      0.00         1
    Scriptores Fabularum       0.00      0.00      0.00         1
               Sophistae       0.33      0.12      0.18         8
                 Tactici       0.00      0.00      0.00         2
              Theologici       0.57      0.33      0.42        12
                 Tragici       0.29      0.61      0.39        28

             avg / total       0.40      0.47      0.40       373

/root/venv/lib/python3.5/site-packages/sklearn/metrics/classification.py:1113: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

SVC


In [ ]:
def run_svc(X_train_scaled, X_test_scaled, Y_train, Y_test):
    """Run SVC with scikit."""
    # This is where we define the models with pre-defined parameters
    # We can learn these parameters given our data
    print('Defining and fitting SVC model ...')
    t0 = dt.datetime.utcnow()   
    scv = svm.LinearSVC(C=100.)

    scv.fit(X_train_scaled, Y_train)

    fp_model_pickle = os.path.expanduser('~/cltk_data/user_data/tlg_bow_svc.pickle')
    joblib.dump(scv, fp_model_pickle)

    print('... finished in {} secs.'.format(dt.datetime.utcnow() - t0))
    print()
    

    Y_prediction_svc = scv.predict(X_test_scaled)
    print('svc_predictions ', Y_prediction_svc)

    expected = Y_test
    print('actual_values   ', expected)


    print()
    print('----SVC_report--------------------------------')
    print(classification_report(expected, Y_prediction_svc))

In [ ]:
run_svc(X_train_scaled, X_test_scaled, Y_train, Y_test)

ADA boost


In [ ]:
def run_ada_boost(X_train_scaled, X_test_scaled, Y_train, Y_test):
    """Scikit random forest.
    
    For plotting see:
    http://scikit-learn.org/stable/auto_examples/ensemble/plot_forest_iris.html
    
    Experiment with 'n_estimators'
    """
    
    n_estimators = 30
    ada_classifier = AdaBoostClassifier(DecisionTreeClassifier(max_depth=3),
                                        n_estimators=n_estimators)

    # Train
    clf = clone(ada_classifier)
    clf = ada_classifier.fit(X_train_scaled, Y_train)
    
    fp_model_pickle = os.path.expanduser('~/cltk_data/user_data/tlg_bow_ada_boost.pickle')
    joblib.dump(clf, fp_model_pickle)
    
    scores = clf.score(X_train_scaled, Y_train)
    
    
    
    Y_prediction = clf.predict(X_test_scaled)
    print('tree_predictions ', Y_prediction)

    expected = Y_test
    print('actual_values   ', expected)


    print()
    print(classification_report(expected, Y_prediction))

In [ ]:
run_ada_boost(X_train_scaled, X_test_scaled, Y_train, Y_test)